library(data.table)
library(tidyr)
library(maps)
library(haven)
library(ggplot2)
library(dplyr)
library(readxl)
library(ggrepel)
library(wordcloud)
library(lme4)
library(lmerTest)
PREPARATION DATASETS FOR ANALYSIS #################### #################### ####################
PREP THE DATASET FOR ANALYSIS WVS 5 & 6 ####################
#read the data (Wave 5)
# Data of Wave 5
WV5_data <- readRDS("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/F00007944-WV5_Data_R_v20180912.rds")
# Convert WV5_data-object in data.frame
WV5_data_df <- as.data.frame(WV5_data)
# show first five columns
WV5_data_df
#rename the variables
WV5_data <- WV5_data_df %>%
rename(gender = V235, age = V237, country_code = V2, wave = V1, risktaking = V86, children = V56, married = V55, employed = V241, education = V238)
WV5_data
colnames(WV5_data)
[1] "wave" "V1A" "V1B" "country_code" "V2A" "V3" "V4"
[8] "V4_CO" "V5" "V5_CO" "V6" "V6_CO" "V7" "V7_CO"
[15] "V8" "V8_CO" "V9" "V9_CO" "V10" "V11" "V12"
[22] "V13" "V14" "V15" "V16" "V17" "V18" "V19"
[29] "V20" "V21" "V22" "V23" "V24" "V25" "V26"
[36] "V27" "V28" "V29" "V30" "V31" "V32" "V33"
[43] "V34" "V35" "V36" "V37" "V38" "V39" "V40"
[50] "V41" "V42" "V43" "V43_01" "V43_02" "V43_03" "V43_04"
[57] "V43_05" "V43_06" "V43_07" "V43_08" "V43_09" "V43_10" "V43_11"
[64] "V43_12" "V43_13" "V43_14" "V43_15" "V43_16" "V43_17" "V43_18"
[71] "V43_19" "V43_20" "V43_21" "V43_22" "V43_23" "V43_24" "V43_25"
[78] "V43_26" "V43_27" "V43_28" "V43_29" "V43_30" "V44" "V45"
[85] "V46" "V47" "V48" "V49" "V50" "V51" "V52"
[92] "V53" "V54" "married" "children" "V57" "V58" "V59"
[99] "V60" "V61" "V62" "V63" "V64" "V65" "V66"
[106] "V67" "V68" "V69" "V69_HK" "V70" "V70_HK" "V71"
[113] "V72" "V73" "V73_HK" "V74" "V74_HK" "V75" "V76"
[120] "V77" "V78" "V79" "V80" "V81" "V82" "V83"
[127] "V84" "V85" "risktaking" "V87" "V88" "V89" "V90"
[134] "V91" "V92" "V93" "V94" "V95" "V96" "V97"
[141] "V98" "V99" "V100" "V101" "V102" "V103" "V104"
[148] "V105" "V106" "V107" "V108" "V109" "V110" "V111"
[155] "V112" "V113" "V114" "V115" "V116" "V117" "V118"
[162] "V119" "V120" "V121" "V122" "V123" "V124" "V125"
[169] "V126" "V127" "V128" "V129" "V130" "V130_CA_1" "V130_IQ_1"
[176] "V130_IQ_2" "V130_IQ_3" "V130_IQ_4" "V130_NZ_1" "V130_NZ_2" "V131" "V132"
[183] "V133" "V134" "V135" "V136" "V137" "V138" "V139"
[190] "V140" "V141" "V142" "V143" "V144" "V145" "V146_00"
[197] "V146_01" "V146_02" "V146_03" "V146_04" "V146_05" "V146_06" "V146_07"
[204] "V146_08" "V146_09" "V146_10" "V146_11" "V146_12" "V146_13" "V146_14"
[211] "V146_15" "V146_16" "V146_17" "V146_18" "V146_19" "V146_20" "V146_21"
[218] "V146_22" "V147" "V148" "V149" "V150" "V151" "V151_IQ_A"
[225] "V151_IQ_B" "V152" "V153" "V154" "V155" "V156" "V157"
[232] "V158" "V159" "V160" "V161" "V162" "V163" "V164"
[239] "V165" "V166" "V167" "V168" "V169" "V170" "V171"
[246] "V172" "V173" "V174" "V175" "V176" "V177" "V178"
[253] "V179" "V180" "V181" "V182" "V183" "V184" "V185"
[260] "V186" "V187" "V188" "V189" "V190" "V191" "V192"
[267] "V193" "V194" "V195" "V196" "V197" "V198" "V199"
[274] "V200" "V201" "V202" "V203" "V204" "V205" "V206"
[281] "V207" "V208" "V209" "V210" "V211" "V212" "V213A"
[288] "V213B" "V213C" "V213D" "V213E" "V213F" "V213G" "V213H"
[295] "V213K" "V213L" "V213M" "V213N" "V214" "V215" "V216"
[302] "V217" "V218" "V219" "V220" "V221" "V222" "V223"
[309] "V224" "V225" "V226" "V227" "V228" "V229" "V230"
[316] "V231" "V232" "V233" "V233A" "V234" "gender" "V236"
[323] "age" "education" "V238CS" "V239" "V240" "employed" "V242"
[330] "V242A_CO" "V243" "V244" "V245" "V246" "V247" "V248"
[337] "V249" "V250" "V251" "V252" "V252B" "V253" "V253CS"
[344] "V254" "V255" "V255CS" "V256" "V257" "V257B" "V257C"
[351] "V258" "V259" "V259A" "V260" "V261" "V262" "V263"
[358] "V264" "V265" "S024" "S025" "Y001" "Y002" "Y003"
[365] "SACSECVAL" "SECVALWGT" "RESEMAVAL" "WEIGHTB" "I_AUTHORITY" "I_NATIONALISM" "I_DEVOUT"
[372] "DEFIANCE" "WEIGHT1A" "I_RELIGIMP" "I_RELIGBEL" "I_RELIGPRAC" "DISBELIEF" "WEIGHT2A"
[379] "I_NORM1" "I_NORM2" "I_NORM3" "RELATIVISM" "WEIGHT3A" "I_TRUSTARMY" "I_TRUSTPOLICE"
[386] "I_TRUSTCOURTS" "SCEPTICISM" "WEIGHT4A" "I_INDEP" "I_IMAGIN" "I_NONOBED" "AUTONOMY"
[393] "WEIGHT1B" "I_WOMJOB" "I_WOMPOL" "I_WOMEDU" "EQUALITY" "WEIGHT2B" "I_HOMOLIB"
[400] "I_ABORTLIB" "I_DIVORLIB" "CHOICE" "WEIGHT3B" "I_VOICE1" "I_VOICE2" "I_VOI2_00"
[407] "VOICE" "WEIGHT4B" "S001" "S007" "S018" "S019" "S021"
[414] "COW"
#select only the variables of interest
WV5_data <- WV5_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV5_data
countrynames <- read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header = FALSE, as.is = TRUE)
colnames(countrynames) <- c("code", "name")
# Assuming WV5_data has a column named country_code
WV5_data$country <- countrynames$name[match(WV5_data$country_code, countrynames$code)]
# Check the frequency of each country in the new column
table(WV5_data$country)
Andorra Argentina Australia Brazil Bulgaria Burkina Faso
1003 1002 1421 1500 1001 1534
Canada Chile China Colombia Cyprus (G) Egypt
2164 1000 1991 3025 1050 3051
Ethiopia Finland France Georgia Germany Ghana
1500 1014 1001 1500 2064 1534
Great Britain Guatemala Hong Kong Hungary India Indonesia
1041 1000 1252 1007 2001 2015
Iran Iraq Italy Japan Jordan Malaysia
2667 2701 1012 1096 1200 1201
Mali Mexico Moldova Morocco Netherlands New Zealand
1534 1560 1046 1200 1050 954
Norway Peru Poland Romania Russia Rwanda
1025 1500 1000 1776 2033 1507
Slovenia South Africa South Korea Spain Sweden Switzerland
1037 2988 1200 1200 1003 1241
Taiwan Thailand Trinidad and Tobago Turkey Ukraine United States
1227 1534 1002 1346 1000 1249
Uruguay Viet Nam Zambia
1000 1495 1500
# Display the updated WV5_data
print(WV5_data)
length(unique(WV5_data$country))
[1] 58
#Read Dataset (Wave 6)
WV6_data <- load("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/WV6_Data_R_v20201117.rdata")
WV6_data <- WV6_Data_R_v20201117
print(WV6_data)
#rename variables
WV6_data <- WV6_data %>%
rename(wave = V1, gender = V240, age = V242,country_code = V2, risktaking = V76, children = V58, married = V57, employed = V229, education = V248)
#select only the variables of interest
WV6_data <- WV6_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV6_data
#decode daraset (Wave 6)
countrynames = read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country = countrynames$name [match(WV6_data$country_code, countrynames$code)]
table(WV6_data$country)
Algeria Argentina Armenia Australia Azerbaijan Belarus
1200 1030 1100 1477 1002 1535
Brazil Chile China Colombia Cyprus (G) Ecuador
1486 1000 2300 1512 1000 1202
Egypt Estonia Georgia Germany Ghana Haiti
1523 1533 1202 2046 1552 1996
Hong Kong India Iraq Japan Jordan Kazakhstan
1000 4078 1200 2443 1200 1500
Kuwait Kyrgyzstan Lebanon Libya Malaysia Mexico
1303 1500 1200 2131 1300 2000
Morocco Netherlands New Zealand Nigeria Pakistan Palestine
1200 1902 841 1759 1200 1000
Peru Philippines Poland Qatar Romania Russia
1210 1200 966 1060 1503 2500
Rwanda Singapore Slovenia South Africa South Korea Spain
1527 1972 1069 3531 1200 1189
Sweden Taiwan Thailand Trinidad and Tobago Tunisia Turkey
1206 1238 1200 999 1205 1605
Ukraine United States Uruguay Uzbekistan Yemen Zimbabwe
1500 2232 1000 1500 1000 1500
WV6_data
#combine the 2 dataset (Wave 6 + Wave 5)
WV5_data
WV6_data
WVS_data = rbind(WV5_data, WV6_data)
WVS_data
#exclusion of participants and omission of missing data (na)
WVS_data = subset(WVS_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
data_Wave5 = subset(WV5_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
data_Wave6 = subset(WV6_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
WVS_data <- na.omit(WVS_data)
data_Wave5 <- na.omit(data_Wave5)
data_Wave6 <- na.omit(data_Wave6)
# Use the mutate function to change the country name
WVS_data <- WVS_data %>%
mutate(country = ifelse(country == "Great Britain", "United Kingdom", country))
# Neue Spalte 'education_cat' erstellen und initialisieren
WVS_data$education_cat <- NA
# Kategorien zuweisen basierend auf den Bildungsstufen
WVS_data$education_cat <- ifelse(WVS_data$education %in% c(1, 2), "incomplete or no primary education",
ifelse(WVS_data$education %in% c(3, 4, 5, 6), "No Uni",
ifelse(WVS_data$education %in% c(7, 8, 9), "Uni", NA)))
# Tabelle der neuen Kategorien anzeigen
table(WVS_data$education_cat)
incomplete or no primary education No Uni Uni
19297 69364 59866
#Transformation of item risktaking
# Transfrom risk item such that high values represent more risk taking
WVS_data$risktaking = 6 - WVS_data$risktaking + 1
# Transform risk variable into T-score (mean = 50, sd = 10)
WVS_data$T_score_risktaking = 10*scale(WVS_data$risktaking, center=TRUE,scale=TRUE)+50
WVS_data
#Transform risk variable into Z score
# Assuming T-scores have a mean of 50 and a standard deviation of 10
#WVS_data$Z_score_risktaking = (WVS_data$T_score_risktaking - 50) / 10
# Print the resulting data frame
print(WVS_data)
WVS_data <- WVS_data %>%
group_by(country) %>%
mutate(z_score_age = scale(age))
WVS_data
length(unique(WVS_data$country))
[1] 76
nrow(WVS_data) # number of individuals --> Mata et al., 2016 N = 147,118
[1] 148527
range(WVS_data$age)
[1] 15 99
table(WVS_data$gender) # sex table(data$sex)/nrow(data) --> 76,617 Female (1)
1 2
71134 77393
WVS_data$gender = ifelse(WVS_data$gender == 1, 0, 1) # sex: male vs. female
WVS_data$children = ifelse(WVS_data$children == 0, 0, 1) # children: no vs. yes
WVS_data$married = ifelse(WVS_data$married == 1, 1, 0) # married: yes vs. no
WVS_data$employed = ifelse(WVS_data$employed < 4, 1, 0) # employed: yes vs. no
WVS_data$education = ifelse(WVS_data$education < 4, 0, 1) # education: no primary vs. primary+
head(WVS_data)
PREP THE DATASET FOR ANALYSIS GPS ####################
#Add data GPS
gps_data <- haven::read_dta("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/GPS_dataset_individual_level/individual_new.dta")
head(gps_data)
gps_data <- gps_data %>%
drop_na(country, isocode, risktaking, gender, age)
# Display the cleaned data
gps_data
#select only the variables of interest
gps_data <- gps_data %>%
dplyr::select(country, isocode, ison, risktaking, gender, age)
gps_data
gps_data <- gps_data %>%
group_by(country) %>%
mutate(z_score_age = scale(age))
# Display the new column with Z-Scores per Country
gps_data
PREP THE DATASET FOR ANALYSIS HARDSHIP ####################
excel_path <- "/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/Hardship/Hardship_complete_2024.xlsx"
hardship <- read_excel(excel_path)
# Create a vector of labels with the same length as the number of columns in 'countryfacts'
labels <- c("country","mean_homicide","gdp","gini_income","Infant_mortality","life_expect","primary_female_enrollment_rate")
# Print the result
print(hardship)
# Create the 'hardship' column in the 'hardship' data frame
hardship <- hardship %>%
mutate(hardship = (mean_homicide + gdp + gini_income + Infant_mortality + life_expect + primary_female_enrollment_rate) / 6)
hardship
hardship$mean_homicide=log(hardship$mean_homicide)
hardship$gdp=log(hardship$gdp)
hardship$Infant_mortality=log(hardship$Infant_mortality)
hardship$life_expect=log(hardship$life_expect)
hardship$gini_income=log(hardship$gini_income)
hardship$primary_female_enrollment_rate=log(hardship$primary_female_enrollment_rate)
# changing variables into the same direction
# Reverse Codierung
hardship$mean_homicide=scale(hardship$mean_homicide)
hardship$gdp=scale(-hardship$gdp)
hardship$Infant_mortality=scale(hardship$Infant_mortality)
hardship$life_expect=scale(-hardship$life_expect)
hardship$gini_income=scale(hardship$gini_income)
hardship$primary_female_enrollment_rate=scale(hardship$primary_female_enrollment_rate)
hardship
hardship$hardship=(hardship$mean_homicide+hardship$gdp+hardship$gini_income+hardship$life_expect+hardship$Infant_mortality+hardship$primary_female_enrollment_rate)/6
hardship
# Berechnung der Korrelationsmatrix für den Datensatz "hardship"
correlation_hardship <- cor(hardship[, c("mean_homicide", "gdp", "gini_income", "Infant_mortality", "life_expect", "primary_female_enrollment_rate")])
# Visualisierung der Korrelationsmatrix als Heatmap
library(ggplot2)
library(reshape2)
correlation_hardship_melted <- melt(correlation_hardship)
ggplot(correlation_hardship_melted, aes(Var1, Var2, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 10, hjust = 1)) +
coord_fixed()
correlation_hardship <- cor(hardship[, c("mean_homicide", "gdp", "gini_income", "Infant_mortality", "life_expect", "primary_female_enrollment_rate")])
# Erstellen einer Tabelle für die Korrelationsmatrix
correlation_table <- as.data.frame(correlation_hardship)
# Anzeigen der Tabelle
print(correlation_table)
country_check <- unique(WVS_data$country) %in% hardship$isocode
if (all(country_check)) {
print("Alle Länder in beiden Datensätzen sind konsistent.")
} else {
print("Es gibt inkonsistente Länder in den Datensätzen.")
}
[1] "Es gibt inkonsistente Länder in den Datensätzen."
#select only the variables of interest
hardship <- hardship %>%
dplyr::select(country, isocode, hardship)
hardship
PREP THE DATASET FOR ANALYSIS MIXED-MODELS ####################
#Add Hardship to WVS_data
head(WVS_data)
WVS_mixed_model <- left_join(WVS_data, hardship, by = "country")
WVS_mixed_model
gps_mixed_model <- left_join(gps_data, hardship, by = "country")
gps_mixed_model
MIXED-MODELS #################### #################### ####################
MIXED-MODELS WVS-DATA ####################
model0 = lmer(risktaking ~ 1 + (1|country),data = WVS_mixed_model)
summary_model0=summary(model0)
model1 <- lmer(risktaking ~ 1 + scale(age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country),
data = WVS_mixed_model,
control = lmerControl(optimizer = "bobyqa"))
print(summary_model3)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(z_score_age) * hardship + factor(gender) *
hardship + factor(children) + factor(married) + factor(employed) +
factor(education) + (1 + scale(z_score_age) + factor(gender) +
factor(children) + factor(married) + factor(employed) + factor(education) | country)
Data: WVS_mixed_model
Control: lmerControl(optCtrl = list(maxfun = 30000), optimizer = "bobyqa")
AIC BIC logLik deviance df.resid
534717.4 535103.9 -267319.7 534639.4 148488
Scaled residuals:
Min 1Q Median 3Q Max
-2.56078 -0.78206 -0.08329 0.74284 3.14737
Random effects:
Groups Name Variance Std.Dev. Corr
country (Intercept) 0.143201 0.37842
scale(z_score_age) 0.009702 0.09850 0.21
factor(gender)1 0.021562 0.14684 -0.01 0.16
factor(children)1 0.019324 0.13901 0.02 0.06 0.08
factor(married)1 0.009997 0.09999 0.10 0.21 0.51 0.23
factor(employed)1 0.007261 0.08521 -0.01 0.08 0.03 -0.26 -0.22
factor(education)1 0.014968 0.12234 -0.21 -0.03 0.07 -0.14 0.08 0.18
Residual 2.128571 1.45896
Number of obs: 148527, groups: country, 76
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.51893 0.04584 76.32698 76.762 < 2e-16 ***
scale(z_score_age) -0.22218 0.01240 74.85319 -17.911 < 2e-16 ***
hardship 0.18581 0.07032 75.24809 2.642 0.010 *
factor(gender)1 -0.34504 0.01899 67.55355 -18.174 < 2e-16 ***
factor(children)1 -0.20392 0.02022 73.26248 -10.085 1.71e-15 ***
factor(married)1 -0.13353 0.01556 62.33679 -8.582 3.73e-12 ***
factor(employed)1 0.01553 0.01336 68.46002 1.163 0.249
factor(education)1 0.12451 0.01877 51.11003 6.633 2.05e-08 ***
scale(z_score_age):hardship 0.10088 0.01967 73.52851 5.128 2.30e-06 ***
hardship:factor(gender)1 0.03238 0.02798 66.21324 1.157 0.251
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) sc(__) hrdshp fctr(g)1 fctr(c)1 fctr(mr)1 fctr(mp)1 fctr(d)1 s(__):
scl(z_scr_) 0.183
hardship 0.042 0.012
fctr(gndr)1 -0.056 0.153 -0.001
fctr(chld)1 -0.046 -0.060 0.003 0.009
fctr(mrrd)1 0.049 0.136 0.000 0.361 -0.070
fctr(mply)1 -0.063 0.099 0.003 0.089 -0.190 -0.146
fctr(dctn)1 -0.293 0.042 0.008 0.053 -0.069 0.038 0.059
scl(z_sc_): 0.015 0.037 0.201 0.004 0.012 -0.001 -0.034 -0.009
hrdshp:f()1 -0.003 0.012 -0.061 0.045 -0.008 -0.004 0.021 0.000 0.098
# Zusammenfassung des Modells anzeigen
summary_model1 <- summary(model1)
# Gewünschte Werte extrahieren und formatieren
results_model1 <- data.frame(
Predictor = c("Intercept", "Age", "Gender"),
Estimate = c(summary_model1$coefficients["(Intercept)", "Estimate"],
summary_model1$coefficients["scale(age)", "Estimate"],
summary_model1$coefficients["factor(gender)1", "Estimate"]),
SE = c(summary_model1$coefficients["(Intercept)", "Std. Error"],
summary_model1$coefficients["scale(age)", "Std. Error"],
summary_model1$coefficients["factor(gender)1", "Std. Error"]),
T_score = c(summary_model1$coefficients["(Intercept)", "t value"],
summary_model1$coefficients["scale(age)", "t value"],
summary_model1$coefficients["factor(gender)1", "t value"]),
p_value = c(summary_model1$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model1$coefficients["scale(age)", "Pr(>|t|)"],
summary_model1$coefficients["factor(gender)1", "Pr(>|t|)"])
)
# Formatierung der p-Werte
results_model1$p_value <- ifelse(results_model1$p_value < 0.001, "< .001", sprintf("%.3f", results_model1$p_value))
# Ergebnisse anzeigen
print(results_model1)
model2 = lmer(risktaking ~ 1+scale(z_score_age)+factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education)+ (1+scale(z_score_age)+factor(gender)+ factor(children) + factor(married) + factor(employed) + factor(education)|country),data = WVS_mixed_model,control=lmerControl(optCtrl=list(maxfun=30000),optimizer="bobyqa"))
summary_model2=summary(model2)
print(summary_model2)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(z_score_age) + factor(gender) + factor(children) +
factor(married) + factor(employed) + factor(education) + (1 + scale(z_score_age) + factor(gender) + factor(children) +
factor(married) + factor(employed) + factor(education) | country)
Data: WVS_mixed_model
Control: lmerControl(optCtrl = list(maxfun = 30000), optimizer = "bobyqa")
REML criterion at convergence: 534701.2
Scaled residuals:
Min 1Q Median 3Q Max
-2.56914 -0.78239 -0.08366 0.74260 3.14578
Random effects:
Groups Name Variance Std.Dev. Corr
country (Intercept) 0.157684 0.39709
scale(z_score_age) 0.014546 0.12061 0.32
factor(gender)1 0.023553 0.15347 0.08 0.31
factor(children)1 0.019645 0.14016 0.09 0.19 0.11
factor(married)1 0.010191 0.10095 0.28 0.49 0.56 0.21
factor(employed)1 0.007384 0.08593 -0.04 0.02 0.01 -0.26 -0.21
factor(education)1 0.015326 0.12380 -0.17 0.05 0.10 -0.13 0.10 0.18
Residual 2.128583 1.45897
Number of obs: 148527, groups: country, 76
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.51389 0.04784 76.46867 73.458 < 2e-16 ***
scale(z_score_age) -0.22506 0.01474 74.40067 -15.264 < 2e-16 ***
factor(gender)1 -0.34620 0.01965 72.57294 -17.621 < 2e-16 ***
factor(children)1 -0.20501 0.02033 72.32226 -10.084 1.98e-15 ***
factor(married)1 -0.13330 0.01561 61.82874 -8.539 4.72e-12 ***
factor(employed)1 0.01620 0.01341 67.29409 1.208 0.231
factor(education)1 0.12409 0.01891 50.11921 6.562 2.88e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) sc(__) fctr(g)1 fctr(c)1 fctr(mr)1 fctr(mp)1
scl(z_scr_) 0.290
fctr(gndr)1 0.025 0.277
fctr(chld)1 0.009 0.057 0.032
fctr(mrrd)1 0.175 0.339 0.397 -0.080
fctr(mply)1 -0.079 0.053 0.077 -0.191 -0.145
fctr(dctn)1 -0.257 0.089 0.073 -0.064 0.046 0.058
# Zusammenfassung des Modells anzeigen
summary_model2 <- summary(model2)
# Gewünschte Werte extrahieren und formatieren
results_model2 <- data.frame(
Predictor = c("Intercept", "Age", "Gender", "Parental status", "Marital status", "Occupational status", "Education"),
Estimate = c(summary_model2$coefficients["(Intercept)", "Estimate"],
summary_model2$coefficients["scale(z_score_age)", "Estimate"],
summary_model2$coefficients["factor(gender)1", "Estimate"],
summary_model2$coefficients["factor(children)1", "Estimate"],
summary_model2$coefficients["factor(married)1", "Estimate"],
summary_model2$coefficients["factor(employed)1", "Estimate"],
summary_model2$coefficients["factor(education)1", "Estimate"]),
SE = c(summary_model2$coefficients["(Intercept)", "Std. Error"],
summary_model2$coefficients["scale(z_score_age)", "Std. Error"],
summary_model2$coefficients["factor(gender)1", "Std. Error"],
summary_model2$coefficients["factor(children)1", "Std. Error"],
summary_model2$coefficients["factor(married)1", "Std. Error"],
summary_model2$coefficients["factor(employed)1", "Std. Error"],
summary_model2$coefficients["factor(education)1", "Std. Error"]),
T_score = c(summary_model2$coefficients["(Intercept)", "t value"],
summary_model2$coefficients["scale(z_score_age)", "t value"],
summary_model2$coefficients["factor(gender)1", "t value"],
summary_model2$coefficients["factor(children)1", "t value"],
summary_model2$coefficients["factor(married)1", "t value"],
summary_model2$coefficients["factor(employed)1", "t value"],
summary_model2$coefficients["factor(education)1", "t value"]),
p_value = c(summary_model2$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model2$coefficients["scale(z_score_age)", "Pr(>|t|)"],
summary_model2$coefficients["factor(gender)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(children)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(married)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(employed)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(education)1", "Pr(>|t|)"])
)
# Formatierung der p-Werte
results_model2$p_value <- ifelse(results_model2$p_value < 0.001, "< .001", sprintf("%.3f", results_model2$p_value))
# Ergebnisse anzeigen
print(results_model2)
model3 <- lmer(risktaking ~ 1+scale(z_score_age)*hardship+factor(gender)*hardship + factor(children) + factor(married) + factor(employed) + factor(education)+ (1+scale(z_score_age)+factor(gender)+ factor(children) + factor(married) + factor(employed) + factor(education)|country),data = WVS_mixed_model,control=lmerControl(optCtrl=list(maxfun=30000),optimizer="bobyqa"),REML = FALSE)
summary_model3=summary(model3)
print(summary_model3)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(z_score_age) * hardship + factor(gender) *
hardship + factor(children) + factor(married) + factor(employed) +
factor(education) + (1 + scale(z_score_age) + factor(gender) +
factor(children) + factor(married) + factor(employed) + factor(education) | country)
Data: WVS_mixed_model
Control: lmerControl(optCtrl = list(maxfun = 30000), optimizer = "bobyqa")
AIC BIC logLik deviance df.resid
534717.4 535103.9 -267319.7 534639.4 148488
Scaled residuals:
Min 1Q Median 3Q Max
-2.56078 -0.78206 -0.08329 0.74284 3.14737
Random effects:
Groups Name Variance Std.Dev. Corr
country (Intercept) 0.143201 0.37842
scale(z_score_age) 0.009702 0.09850 0.21
factor(gender)1 0.021562 0.14684 -0.01 0.16
factor(children)1 0.019324 0.13901 0.02 0.06 0.08
factor(married)1 0.009997 0.09999 0.10 0.21 0.51 0.23
factor(employed)1 0.007261 0.08521 -0.01 0.08 0.03 -0.26 -0.22
factor(education)1 0.014968 0.12234 -0.21 -0.03 0.07 -0.14 0.08 0.18
Residual 2.128571 1.45896
Number of obs: 148527, groups: country, 76
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.51893 0.04584 76.32698 76.762 < 2e-16 ***
scale(z_score_age) -0.22218 0.01240 74.85319 -17.911 < 2e-16 ***
hardship 0.18581 0.07032 75.24809 2.642 0.010 *
factor(gender)1 -0.34504 0.01899 67.55355 -18.174 < 2e-16 ***
factor(children)1 -0.20392 0.02022 73.26248 -10.085 1.71e-15 ***
factor(married)1 -0.13353 0.01556 62.33679 -8.582 3.73e-12 ***
factor(employed)1 0.01553 0.01336 68.46002 1.163 0.249
factor(education)1 0.12451 0.01877 51.11003 6.633 2.05e-08 ***
scale(z_score_age):hardship 0.10088 0.01967 73.52851 5.128 2.30e-06 ***
hardship:factor(gender)1 0.03238 0.02798 66.21324 1.157 0.251
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) sc(__) hrdshp fctr(g)1 fctr(c)1 fctr(mr)1 fctr(mp)1 fctr(d)1 s(__):
scl(z_scr_) 0.183
hardship 0.042 0.012
fctr(gndr)1 -0.056 0.153 -0.001
fctr(chld)1 -0.046 -0.060 0.003 0.009
fctr(mrrd)1 0.049 0.136 0.000 0.361 -0.070
fctr(mply)1 -0.063 0.099 0.003 0.089 -0.190 -0.146
fctr(dctn)1 -0.293 0.042 0.008 0.053 -0.069 0.038 0.059
scl(z_sc_): 0.015 0.037 0.201 0.004 0.012 -0.001 -0.034 -0.009
hrdshp:f()1 -0.003 0.012 -0.061 0.045 -0.008 -0.004 0.021 0.000 0.098
# Zusammenfassung des Modells anzeigen
summary_model3 <- summary(model3)
# Gewünschte Werte extrahieren und formatieren
results_model3 <- data.frame(
Predictor = c("Intercept", "Age", "Gender", "Parental status", "Marital status", "Occupational status", "Education", "Hardship", "Interaction: Gender * Hardship"),
Estimate = c(summary_model3$coefficients["(Intercept)", "Estimate"],
summary_model3$coefficients["scale(z_score_age)", "Estimate"],
summary_model3$coefficients["factor(gender)1", "Estimate"],
summary_model3$coefficients["factor(children)1", "Estimate"],
summary_model3$coefficients["factor(married)1", "Estimate"],
summary_model3$coefficients["factor(employed)1", "Estimate"],
summary_model3$coefficients["factor(education)1", "Estimate"],
summary_model3$coefficients["hardship", "Estimate"],
summary_model3$coefficients["hardship:factor(gender)1", "Estimate"]),
SE = c(summary_model3$coefficients["(Intercept)", "Std. Error"],
summary_model3$coefficients["scale(z_score_age)", "Std. Error"],
summary_model3$coefficients["factor(gender)1", "Std. Error"],
summary_model3$coefficients["factor(children)1", "Std. Error"],
summary_model3$coefficients["factor(married)1", "Std. Error"],
summary_model3$coefficients["factor(employed)1", "Std. Error"],
summary_model3$coefficients["factor(education)1", "Std. Error"],
summary_model3$coefficients["hardship", "Std. Error"],
summary_model3$coefficients["hardship:factor(gender)1", "Std. Error"]),
T_score = c(summary_model3$coefficients["(Intercept)", "t value"],
summary_model3$coefficients["scale(z_score_age)", "t value"],
summary_model3$coefficients["factor(gender)1", "t value"],
summary_model3$coefficients["factor(children)1", "t value"],
summary_model3$coefficients["factor(married)1", "t value"],
summary_model3$coefficients["factor(employed)1", "t value"],
summary_model3$coefficients["factor(education)1", "t value"],
summary_model3$coefficients["hardship", "t value"],
summary_model3$coefficients["hardship:factor(gender)1", "t value"]),
p_value = c(summary_model3$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model3$coefficients["scale(z_score_age)", "Pr(>|t|)"],
summary_model3$coefficients["factor(gender)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(children)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(married)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(employed)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(education)1", "Pr(>|t|)"],
summary_model3$coefficients["hardship", "Pr(>|t|)"],
summary_model3$coefficients["hardship:factor(gender)1", "Pr(>|t|)"])
)
# Formatierung der p-Werte
results_model3$p_value <- ifelse(results_model3$p_value < 0.001, "< .001", sprintf("%.3f", results_model3$p_value))
# Ergebnisse anzeigen
print(results_model3)
anova(model0,model1)
refitting model(s) with ML (instead of REML)
Data: WVS_mixed_model
Models:
model0: risktaking ~ 1 + (1 | country)
model1: risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model0 3 545328 545357 -272661 545322
model1 10 536124 536223 -268052 536104 9217.4 7 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(model1,model2)
refitting model(s) with ML (instead of REML)
Data: WVS_mixed_model
Models:
model1: risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country)
model2: risktaking ~ 1 + scale(z_score_age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) + (1 + scale(z_score_age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) | country)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model1 10 536124 536223 -268052 536104
model2 36 534730 535087 -267329 534658 1446 26 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(model2,model3)
refitting model(s) with ML (instead of REML)
Data: WVS_mixed_model
Models:
model2: risktaking ~ 1 + scale(z_score_age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) + (1 + scale(z_score_age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) | country)
model3: risktaking ~ 1 + scale(z_score_age) * hardship + factor(gender) * hardship + factor(children) + factor(married) + factor(employed) + factor(education) + (1 + scale(z_score_age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) | country)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model2 36 534730 535087 -267329 534658
model3 39 534717 535104 -267320 534639 18.9 3 0.0002868 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
coefsallmodels=rbind(summary_model1$coefficients,
summary_model2$coefficients,
summary_model3$coefficients[c(1:2,4:8,3,9:10),])
write.csv(coefsallmodels,"coefsallmodels.csv")
file_path <- file.path(getwd(), "coefsallmodels.csv")
file_path
[1] "/Users/laurabazzigher/Documents/GitHub/risk_wvs/code/coefsallmodels.csv"
# Extrahieren der Koeffizienten-Tabelle für jedes Modell
coefficients_model0 <- summary(model0)$coefficients
coefficients_model1 <- summary(model1)$coefficients
coefficients_model2 <- summary(model2)$coefficients
coefficients_model3 <- summary(model3)$coefficients
# Filtern der erforderlichen Zeilen aus den Koeffizienten
coefficients_model0 <- coefficients_model0[rownames(coefficients_model0) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)"), ]
coefficients_model1 <- coefficients_model1[rownames(coefficients_model1) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)"), ]
coefficients_model2 <- coefficients_model2[rownames(coefficients_model2) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)", "factor(children)", "factor(married)", "factor(employed)", "factor(education)"), ]
coefficients_model3 <- coefficients_model3[rownames(coefficients_model3) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)", "factor(children)", "factor(married)", "factor(employed)", "factor(education)", "hardship", "scale(z_score_age):hardship", "factor(gender):hardship"), ]
# Zusammenführen der geschätzten Koeffizienten aus allen Modellen
coefs_all_models <- rbind(coefficients_model0, coefficients_model1, coefficients_model2, coefficients_model3)
# Erstellen einer Tabelle aus den Koeffizienten
results_table <- data.frame(
Predictor = rownames(coefs_all_models),
b = coefs_all_models[, "Estimate"],
SE = coefs_all_models[, "Std. Error"],
T_score = coefs_all_models[, "t value"],
p_value = coefs_all_models[, "Pr(>|t|)"]
)
# Drucken der Ergebnistabelle
results_table